/*
 * Routines for dealing with Agents
 */

#include <stdio.h>
#include <stdlib.h>
#include <netinet/in.h>
#include <unistd.h>

#include "libfma.h"
#include "lf_fms_comm.h"
#include "lf_fma_comm.h"
#include "lf_channel.h"
#include "lf_fabric.h"
#include "lf_scheduler.h"
#include "lf_alert.h"
#include "lf_topo_map.h"
#include "lf_product_def.h"
#include "lf_fma_flags.h"
#include "libmyri.h"

#include "fms.h"
#include "fms_error.h"
#include "fms_io.h"
#include "fms_fma.h"
#include "fms_fma_map.h"
#include "fms_notify.h"
#include "fms_fabric.h"
#include "fms_fabric_delta.h"
#include "fms_resolve.h"
#include "fms_state.h"
#include "fms_settings.h"

static void reset_fma_receive(struct lf_channel *chp);
static void fms_fma_start_message(struct lf_channel *chp);
static void fms_fma_finish_message(struct lf_channel *chp);
static void get_fma_ident(struct fms_fma_desc *adp,
			  struct fma_fms_ident_msg *idp);
static void get_fma_info(struct fms_fma_desc *adp,
			 struct fma_fms_host_msg *hmp);
static void fms_fma_got_map(struct fms_fma_desc *adp, struct lf_topo_map *topo);
static void fma_hangup(struct lf_channel *chp);
static void check_fma_connections(void *);
static void fms_reconcile_nic_info(struct lf_host *hp,
                                   struct fma_fms_host_msg *hmp);
static void fms_move_nic_links(struct lf_nic *onicp, struct lf_nic *nnicp);
static void fms_fma_mapping_timeout(void *vadp);
static void fms_fma_nic_error(struct fms_fma_desc *adp,
			      struct fma_fms_nic_error_msg *msg);
static void fms_fma_nic_badcrc(struct fms_fma_desc *adp,
    			       struct fma_fms_nic_badcrc_msg *msg);
static void fms_fma_send_settings(struct fms_fma_desc *adp);
static void fms_fma_reports_invalid_map(struct fms_fma_desc *adp,
  struct fma_fms_map_invalid_msg *msg);
static void fms_free_adp(struct fms_fma_desc *adp);
static int fms_fma_try_request_map(struct fms_fma_desc *adp);

int
init_fma_vars()
{
  struct fms_fma_info *aip;

  LF_CALLOC(aip, struct fms_fma_info, 1);
  F.fma_info = aip;
  return 0;

 except:
  return -1;
}

/*
 * Initialize FMA subsystem
 */
int
init_fma()
{
  struct lf_event *evp;
  struct fms_fma_info *aip;

  aip = F.fma_info;

  /*
   * Do some processing of "preferred_mapper" setting
   */
  if (F.settings->preferred_mapper[0] != '\0') { 
    char **pmp;
    int pm_cnt;

    LF_CALLOC(pmp, char *, LF_STRING_LEN);

    line2words(F.settings->preferred_mapper, pmp, ",", LF_STRING_LEN, &pm_cnt);

    /* save the info in struct */
    aip->preferred_mappers = pmp;
    aip->num_pref_mappers = pm_cnt;
  }
  
  evp = lf_schedule_event(check_fma_connections, NULL,
                          FMS_FMA_FIRST_CONNECT_CHECK_DELAY);
  if (evp == NULL) return -1;

  return 0;

 except:
  return -1;
}

/*
 * Initialize a connection with a fabric agent
 */
int
init_fma_connection(
  struct fms_connection *fcp)
{
  struct lf_channel *chp;
  struct fms_fma_desc *adp;

  adp = NULL;

  /* allocate agent struct */
  LF_CALLOC(adp, struct fms_fma_desc, 1);

  /* fill in the agent structure */
  adp->fcp = fcp;
  adp->chp = fcp->chp;

  /* set up a channel to receive FMA messages */
  chp = fcp->chp;
  chp->hangup_rtn = fma_hangup;
  chp->context = adp;

  /* arm for first incoming message */
  reset_fma_receive(chp);

  return 0;

 except:
  if (adp != NULL) {
    fms_free_adp(adp);
  }
  return -1;
}

/*
 * Free an FMA descriptor and its contents
 */
static void
fms_free_adp(
  struct fms_fma_desc *adp)
{
  LF_FREE(adp->adp_msgbuf);
  LF_FREE(adp);
}

/*
 * handle incoming data from an FMA
 */
static void
fms_fma_start_message(
  struct lf_channel *chp)
{
  struct fms_fma_desc *adp;
  int len;

  adp = chp->context;

  /* make sure message buffer is big enough to hold incoming message */
  len = ntohl(adp->header.length_32);

  /* if message length is 0, just handle it */
  if (len == 0) {
    
    fms_fma_finish_message(chp);

  /* non-zero-length message, get the rest */
  } else {

    if (len > adp->adp_msgbuf_size) {
      void *p;
      p = realloc(adp->adp_msgbuf, len);
      if (p == NULL) LF_ERROR(("Error allocating space for incoming message"));

      adp->adp_msgbuf = (union lf_fma_message *)p;
      adp->adp_msgbuf_size = len;
    }

    /* get the rest of the message */
    lf_channel_receive(chp, adp->adp_msgbuf, len, fms_fma_finish_message);
  }
  return;
 except:
  fms_perror();

  /* detach from this FMA */
  disconnect_from_fma(adp);
}

static void
fms_fma_finish_message(
  struct lf_channel *chp)
{
  struct fms_fma_desc *adp;
  int type;
  int len;

  adp = chp->context;

  /* convert type and length into useable forms */
  type = ntohl(adp->header.msg_type_32);
  len = ntohl(adp->header.length_32);

  /* If buffer is oversized, drop it down to something reasonable */
  if (len < sizeof(union fma_fms_msg)*2 &&
      adp->adp_msgbuf_size > sizeof(union fma_fms_msg)*2) {
    adp->adp_msgbuf = realloc(adp->adp_msgbuf, sizeof(union fma_fms_msg)*2);
    if (adp->adp_msgbuf == NULL) LF_ERROR(("Error reducing size of msgbuf"));

    adp->adp_msgbuf_size = sizeof(union fma_fms_msg)*2;
  }

  /* get ready for next message */
  reset_fma_receive(chp);

  /* handle the message last since adp might go away */
  fms_handle_message(adp, type, len, adp->adp_msgbuf);

  return;

 except:
  fms_perror_exit(1);
}

/*
 * Decode and handle the incoming message
 */
void
fms_handle_message(
  struct fms_fma_desc *adp,
  int type,
  int length,
  union fma_fms_msg *msg)
{


  /* switch on message type */
  switch (type) {
  case FMA_FMS_IDENT_MSG:
    if (length != sizeof(struct fma_fms_ident_msg)) {
      LF_ERROR(("Bad ident msg length"));
    }

    /* arm channel for the rest of the message */
    get_fma_ident(adp, &msg->ident);
    break;

  case FMA_FMS_HOST_MSG:

    /* arm channel for the rest of the message */
    get_fma_info(adp, &msg->host);
    break;

  /* An FMA reports that the current map is invalid */
  case FMA_FMS_MAP_IS_INVALID:
    fms_fma_reports_invalid_map(adp, &msg->map_invalid);
    break;

  case FMA_FMS_MAP_UPLOAD:
    fms_fma_got_map(adp, (struct lf_topo_map *)msg);

    break;

  case FMA_FMS_SEND_INVALID_ROUTE_COMPLETE:
    fms_send_inv_rts_complete(adp, &msg->inv_rt);
    break;

  case FMA_FMS_NIC_ERROR:
    fms_fma_nic_error(adp, &msg->nic_error);
    break;

  case FMA_FMS_NIC_BADCRC:
    fms_fma_nic_badcrc(adp, &msg->nic_badcrc);
    break;

  case FMA_FMS_PROXY_FMA_TO_FMS:
    fms_fma_proxy_message(adp, &msg->proxy_fma_to_fms);
    break;

  default:
    LF_ERROR(("Unknown message type from FMA: %d", type));
    break;
  }
  return;

 except:
  fms_perror();

  /* detach from this FMA */
  disconnect_from_fma(adp);
}

/*
 * handle a hangup from an FMA
 */
static void
fma_hangup(
  struct lf_channel *chp)
{
  struct fms_fma_desc *adp;

  adp = chp->context;
  if (adp->fabric_host != NULL) {
    fms_fma_alert_lost_fma(adp->fabric_host);
  }

  disconnect_from_fma(adp);
}

/*
 * Disconnect from an FMA and destroy all record of it's channel.
 */
void
disconnect_from_fma(
  struct fms_fma_desc *adp)
{
  struct lf_fabric *fp;
  struct lf_host *hp;

  /* get pointer to fabric */
  fp = F.fabvars->fabric;

  /* close connection and channel if needed */
  if (adp->fcp != NULL) {
    close_connection(adp->fcp);
  }

  /* clean up any proxy clients we may have */
  fms_remove_all_proxy_clients(adp);

  /* If this has an associated host struct, clear the reverse pointer */
  hp = adp->fabric_host;
  if (hp != NULL) {
    FMS_HOST(hp)->adp = NULL;
    hp->fma_flags &= ~FMA_FLAG_HAS_FMS;	/* no more FMS contact */
    fms_notify(FMS_EVENT_DEBUG, "Disconnecting adp from %s", hp->hostname);
    --F.fabvars->num_fmas;
  }

  /* Remove references to this fma */
  if (FMS_FABRIC(fp)->invalid_map_reporter == adp) {
    FMS_FABRIC(fp)->invalid_map_reporter = NULL;
  }

  /* If this FMA is mapping for us, cancel the timeout and re-request map */
  if (FMS_FABRIC(fp)->request_map_adp == adp) {
    FMS_FABRIC(fp)->request_map_adp = NULL;
    lf_remove_event(FMS_FABRIC(fp)->request_map_timeout);
    FMS_FABRIC(fp)->request_map_timeout = NULL;

    fms_notify(FMS_EVENT_DEBUG,
	"Mapping FMA going away - attempting to re-request");
    fms_fma_request_map(NULL);
  }

  /* free adp and contents */
  fms_free_adp(adp);
}

/*
 * Make sure we have the correct FMA version
 */
static void
get_fma_ident(
  struct fms_fma_desc *adp,
  struct fma_fms_ident_msg *idp)
{
  int version;

  /* copy in the hostname */
  strcpy(adp->hostname, idp->hostname);

  /* check version number */
  version = ntohl(idp->fma_version_32);
  if (version == LF_VERSION) {
    
    fms_notify(FMS_EVENT_DEBUG, "FMA connection from host %s", adp->hostname);

  } else {
    lf_string_t reason;

    fms_notify(FMS_EVENT_CRITICAL,
	"Bad FMA version %d on host %s", version, idp->hostname);

    /* terminate the FMA and tell it why */
    sprintf(reason, "Bad version %d, should be %d", version, LF_VERSION);
    fms_fma_terminate(adp, reason);
  }

  return;
}

/*
 * Instruct the FMA to terminate and clean up local state
 */
void
fms_fma_terminate(
  struct fms_fma_desc *adp,
  lf_string_t reason)
{
  struct lf_fma_terminate msg;
  struct lf_host *hp;
  int rc;

  /* if associated with a host, disconnect now */
  hp = adp->fabric_host;
  if (hp != NULL) {
    FMS_HOST(hp)->adp = NULL;
    adp->fabric_host = NULL;
    --F.fabvars->num_fmas;
  }

  /* send the reason back to FMA */
  strncpy(msg.reason, reason, sizeof(msg.reason)-1);

  rc = fms_fma_write(adp, LF_FMA_TERMINATE, &msg, sizeof(msg));
  if (rc != 0) {
    LF_ERROR(("Error Sending bad version message"));
  }

  /* from this point, we will be ignoring messages from this FMA */
  /* XXX - start a timeout for closing connection? */
  if (adp->chp != NULL) {
    lf_channel_data_drain(adp->chp);
  }

  return;

 except:
  fms_perror();
  disconnect_from_fma(adp);
  return;
}

/*
 * Get information about this FMA
 * This is where we associate an FMA with a host on the fabric.
 */
static void
get_fma_info(
  struct fms_fma_desc *adp,
  struct fma_fms_host_msg *hmp)
{
  struct lf_host *hp;

  /* try to find this host in our fabric map */
  hp = lf_find_host_by_name(F.fabvars->fabric, adp->hostname);

  /* If not found, create a new one */
  if (hp == NULL) {
    hp = fms_add_new_host(adp->hostname);
  }

  /* We should have a host struct now */
  if (hp == NULL) {
    fms_notify(FMS_EVENT_ERROR,
	       "Cannot allocate host entry for %s\n", adp->hostname);
    fms_fma_terminate(adp, "Cannot allocate host entry");
   
  } else {

    /*
     * If there is already a connection from this host, consider it
     * stale and terminate it.  This is the right thing because a new one
     * will not start if any Myrinet ports are busy, so the old one
     * must really be kaput.
     */
    if (FMS_HOST(hp)->adp != NULL && adp->proxy == NULL) {
      fms_notify(FMS_EVENT_DEBUG,
	           "Duplicate connection from %s", adp->hostname);
      fms_fma_terminate(FMS_HOST(hp)->adp, "Duplicate connection");
    }

    ++F.fabvars->num_fmas;		/* increment count of FMAs */
    FMS_HOST(hp)->adp = adp;
    FMS_HOST(hp)->fma_seen = TRUE;	/* has been seen at least once */
    FMS_HOST(hp)->disconnected = FALSE;	/* reset this assumption */
    adp->fabric_host = hp;
    hp->fma_flags |= FMA_FLAG_HAS_FMS;	/* this node now has FMS contact */

    /* reset any "missing FMA" alerts */
    fms_fma_alert_present(hp);

    /* reconcile NIC info from database with what is reported by FMS */
    fms_reconcile_nic_info(hp, hmp);

    /* compare what the host has to say about where it is connected to what
     * we think. */
    fms_reconcile_link_info(hp, hmp);

    /* send the topo map */
    fms_fma_needs_topo_map(adp);

    /* send settings */
    fms_fma_send_settings(adp);
    
#if 0
    /* Removed since fabric verification takes care of generating a new
     * map if needed */

    /* if first FMA, request a total scan of the map */
    if (F.fabvars->num_fmas == 1) {
      fms_notify(FMS_EVENT_DEBUG, "Scheduling an initial map scan");
      fms_schedule_map_request(NULL);
    }
#endif
  }
}

/*
 * reset FMA channel back to waiting for a message type
 */
static void
reset_fma_receive(
  struct lf_channel *chp)
{
  struct fms_fma_desc *adp;

  adp = chp->context;

  /* set up for next receive */
  lf_channel_receive(chp, &adp->header,
                     sizeof(struct fms_msg_header), fms_fma_start_message);
}

/*
 * Perform a one-time check of each known host for FMA connection
 * After this, events will be generated on connect/disconnect for
 * each host.
 */
static void
check_fma_connections(
  void *unused)
{
  struct lf_host *hp;
  struct lf_fabric *fp;
  int h;

  fp = F.fabvars->fabric;

  /*
   * Check each host for connected FMA
   */
  for (h=0; h<fp->num_hosts; ++h) {
    hp = fp->hosts[h];
    if (FMS_HOST(hp)->adp == NULL && !FMS_HOST(hp)->fma_seen) {
      fms_fma_alert_no_initial_fma(hp);
    }
  }
  
  return;
}

/*
 * Move a NIC to a new host.  This adds a new NIC onto the end of the NIC
 * array for the new host, and collapses the NIC array of the old host to
 * no longer contain the NIC.
 */
void
fms_move_nic(
  struct lf_nic *nicp,
  struct lf_host *hp,
  int nic_id)
{
  int rc;

  /* Remove this NIC from it's old host */
  lf_remove_nic_from_host(nicp);

  /* Add this nic to the new host */
  rc = lf_add_existing_nic_to_host(nicp, hp);
  if (rc != 0) LF_ERROR(("Error adding NIC to different host"));

  /* set the new host_nic_id */
  nicp->host_nic_id = nic_id;

  return;

 except:
  fms_perror_exit(1);
}

/*
 * Reconcile reported NIC info with database and generate alerts for
 * differences where appropriate.
 */
static void
fms_reconcile_nic_info(
  struct lf_host *hp,
  struct fma_fms_host_msg *hmp)
{
  int n;
  int inn;
  struct lf_nic *nicp;
  struct lf_fabric *fp;

  fp = F.fabvars->fabric;		/* get pointer to fabric */

  /* mark NICs on this host as "unseen", then scan info tagging each one */
  for (n=0; n<hp->num_nics; ++n) {
    FMS_NIC(hp->nics[n])->seen = FALSE;


  }

  inn = ntohl(hmp->nic_cnt_32);		/* info # of NICs */

  /* first, just loop through and note any moves (same MAC, different nic_id) */
  for (n=0; n<inn; ++n) {
    struct fma_fms_nic *nip;
    lf_string_t gen_product_id;
    struct lf_nic_def *dp;
    int num_ports;
    int nic_id;

    nip = hmp->nic_array + n;
    nic_id = ntohs(nip->nic_id_16);
    num_ports = ntohl(nip->num_active_ports_32);

    /*
     * See if the productinfo entry for this NIC exists.  If not, convert
     * the reported NIC type to a generic one.
     */
    dp = lf_get_nic_definition(nip->product_id);
    if (dp == NULL) {
      fms_reset_errors();
      lf_make_generic_nic_product_info(num_ports, gen_product_id);
      dp = lf_get_nic_definition(gen_product_id);
      if (dp != NULL) {
	fms_fma_alert_unrecognized_nic_type(hp, nic_id, nip->product_id);
	strcpy(nip->product_id, gen_product_id);
      } else {
	LF_ERROR(("Cannot figure out a NIC type for %s, NIC %d, type=\"%s\"\n",
	      hp->hostname, nic_id, nip->product_id));
      }
    }

    nicp = lf_find_nic_by_mac(fp, nip->mac_addr);
    if (nicp == NULL) continue;         /* skip unknown (new) NICs for now */

    /* If we found this NIC in the wrong host, check for DUP or delete it */
    if (nicp->host != hp) {
      lf_string_t s;

      /* if the hostname for the nic found in the fabric is our
       * special mac-based hostname,
       * then it was just a placeholder - the real hostname was unknown.
       * update hp to point to the nic in the fabric (later, we'll replace
       * the host struct in the fabric with the updated hp)
       */
      lf_make_mac_hostname(s, nip->mac_addr, nicp->num_ports);
      if (strcmp(s, nicp->host->hostname) == 0) {
        struct lf_host *ohp;   /* old host pointer */

        sprintf(s, "NIC " LF_MAC_FORMAT
              " found with unknown hostname, updating to %s:%d",
              LF_MAC_ARGS(nip->mac_addr), hp->hostname, nic_id); 
        fms_notify(FMS_EVENT_INFO, s);

	/* save pointer to the temp host */
        ohp = nicp->host;

	/* Move the NIC from the temp host to this one */
	fms_move_nic(nicp, hp, nic_id);

	fms_update_host_in_db(hp);
#if 0
        /* auto-commit if changed host flag set */
        if (F.fabvars->auto_commit & FMS_AC_CHANGED_HOST) {
  	  fms_commit_host(hp);
        }
#endif

	/* There should be no NICs left on this host */
        if (ohp->num_nics <= 0) {
          lf_remove_host_from_fabric(fp, ohp);

	  /* auto-commit changes to temp hosts */
          fms_remove_host_from_db(ohp);		/* remove from DB */
#if 0
	  fms_commit_host(ohp);
#endif
          fms_free_host(ohp);
        } else {
	  LF_ERROR(("Temp host %s has more than one NIC!", ohp->hostname));
	}

	FMS_NIC(nicp)->seen = TRUE;

      } else {
        /* the DB mac has a valid hostname. If it is still connected to an fma
         * this may be a real dup. If it's not connected to an fma, assume it's
         * just stale data and replace it.
         */
        if (FMS_HOST(nicp->host)->adp != NULL) {
          sprintf(s, "%s reports NIC " LF_MAC_FORMAT ", DB says %s has it also.",
               hp->hostname, LF_MAC_ARGS(nip->mac_addr), nicp->host->hostname);
          fms_notify(FMS_EVENT_INFO, s);

          /* XXX DUP or delete from other host */
          LF_ERROR(("Duplicate NIC"));

	/*
	 * This NIC reports being owned by a host with no FMA.  Steal it.
	 */
        } else {
	  fms_notify(FMS_EVENT_INFO, "Moving NIC %d from %s to %s",
	               nicp->host_nic_id, nicp->host->hostname, hp->hostname);
	  fms_move_nic(nicp, hp, nic_id);
	  FMS_NIC(nicp)->seen = TRUE;

	  /* XXX perform db-fabric updates... */
        }
      }
    }
  }

  for (n=0; n<inn; ++n) {
    struct fma_fms_nic *nip;
    int nic_id;

    nip = hmp->nic_array + n;
    nic_id = ntohs(nip->nic_id_16);
    nicp = lf_find_nic_by_mac_and_host(fp, nip->mac_addr, hp);

    if (nicp == NULL) continue;		/* skip unknown (new) NICs for now */

    /* If any info, note change and auto-commit */
    if (nicp->host_nic_id != nic_id
	|| (nicp->serial_no == NULL
	    || strcmp(nicp->serial_no, nip->serial_no) != 0)
	|| (nicp->product_id == NULL
	    || strcmp(nicp->product_id, nip->product_id) != 0)) {
      lf_string_t s;
      int old_state;

      sprintf(s, "%s: NIC with mac=" LF_MAC_FORMAT " changed.",
	  hp->hostname, LF_MAC_ARGS(nip->mac_addr));
      fms_notify(FMS_EVENT_INFO, s);

      /* remember update_state since fms_change_nic() will change it */
      old_state = FMS_HOST(nicp->host)->update_state;

      /* handle the NIC change */
      nicp = fms_change_nic(nicp, nip);
      if (nicp == NULL) LF_ERROR(("Error changing NIC"));

      fms_update_host_in_db(nicp->host);
#if 0
      /* If no other pending changes, auto-commit this one */
      if (old_state == FMS_UPDATE_NONE) {
	fms_commit_host(nicp->host);
      }
#endif
    }
    FMS_NIC(nicp)->seen = TRUE;	/* mark it as seen */
  }

  /* OK, now go through the list again, paying attention only to those
   * NICs that are *not* found (new)
   */
  for (n=0; n<inn; ++n) {
    struct fma_fms_nic *nip;
    int nic_id;

    nip = hmp->nic_array + n;
    nic_id = ntohs(nip->nic_id_16);
    nicp = lf_find_nic_by_mac(fp, nip->mac_addr);

    if (nicp != NULL) continue;
    nicp = lf_find_nic_by_id(hp, nic_id);

    /* If no NIC currently in this slot, it's new */
    if (nicp == NULL) {
      lf_string_t s;

      sprintf(s, "%s: new nic_id %d, mac=" LF_MAC_FORMAT, 
	  hp->hostname, nic_id, LF_MAC_ARGS(nip->mac_addr));
      fms_notify(FMS_EVENT_INFO, s);

      /* new NIC */
      nicp = fms_add_nic(hp, nip);
      if (nicp == NULL) LF_ERROR(("Error adding new NIC"));

      FMS_NIC(nicp)->seen = TRUE;	/* NIC has been seen */

      fms_update_host_in_db(hp);
#if 0
      /* auto-commit if changed host flag set */
      if (F.fabvars->auto_commit & FMS_AC_CHANGED_HOST) {
	fms_commit_host(nicp->host);
      }
#endif

    /* otherwise, treat this as a changed MAC addr */
    } else {
      lf_string_t s;
      int old_state;

      FMS_NIC(nicp)->seen = TRUE;

      sprintf(s, "%s: nic %d changed mac from " LF_MAC_FORMAT " to "
	  LF_MAC_FORMAT, hp->hostname, nic_id, 
	  LF_MAC_ARGS(nicp->mac_addr), LF_MAC_ARGS(nip->mac_addr));
      fms_notify(FMS_EVENT_INFO, s);

      /* remember update_state since fms_change_nic() will change it */
      old_state = FMS_HOST(nicp->host)->update_state;

      nicp = fms_change_nic(nicp, nip);
      if (nicp == NULL) LF_ERROR(("Error changing NIC"));

      fms_update_host_in_db(hp);

      FMS_NIC(nicp)->seen = TRUE;	/* NIC has been seen */

#if 0
      /* If no other pending changes, auto-commit this one */
      if (old_state == FMS_UPDATE_NONE) {
	fms_commit_host(nicp->host);
      }
#endif
    }
  }

  /*
   * Now, scan through all the NICs listed for the host by the database
   * and note any missing ones.
   */
  for (n=0; n<hp->num_nics; ++n) {

    nicp = hp->nics[n];
    if (!FMS_NIC(nicp)->seen) {
      lf_string_t s;
      struct lf_host *tmp_host;

      sprintf(s, "%s: nic_id %d missing, mac=" LF_MAC_FORMAT, 
	  hp->hostname, nicp->host_nic_id, LF_MAC_ARGS(nicp->mac_addr));
      fms_notify(FMS_EVENT_INFO, s);

      /* missing NIC */
      tmp_host = nicp->host;
      fms_remove_nic(nicp);

      fms_update_host_in_db(tmp_host);
#if 0
      /* auto-commit if changed host flag set */
      if (F.fabvars->auto_commit & FMS_AC_CHANGED_HOST) {
	fms_commit_host(tmp_host);
      }
#endif
    }
  }
  return;

 except:
  fms_perror_exit(1);
}

/*
 * Create a new host in the "current" fabric and record the new host
 */
struct lf_host *
fms_add_new_host(
  char *hostname)
{
  struct lf_fabric *fp;
  struct lf_host *hp;

  fp = F.fabvars->fabric;	/* pointer to current fabric */

  /* add a host struct to the current fabric */
  hp = lf_add_host_to_fabric(fp, hostname);
  if (hp == NULL) LF_ERROR(("Error adding new host"));

  /* allocate FMS-private struct */
  fms_fill_host_private(hp);

  fms_add_host_to_db(hp);
#if 0
  /* make a note of this new hosts added */
  fms_note_new_host(hp);

  /* auto-commit if requested */
  if (F.fabvars->auto_commit & FMS_AC_NEW_HOST) {
    fms_commit_host(hp);
  }
#endif

  fms_notify(FMS_EVENT_INFO, "Added host %s to fabric", hostname);

  return hp;

 except:
  lf_free_host(hp);
  return NULL;
}

/*
 * Reallocate the new NIC and delete the old one, but copy over the
 * link information if the number of ports remains the same.
 */
struct lf_nic *
fms_change_nic(
  struct lf_nic *onicp,
  struct fma_fms_nic *nip)
{
  struct lf_nic *nicp;
  int num_ports;

  /* allocate a NIC for this product ID */
  num_ports = ntohl(nip->num_active_ports_32);
  nicp = lf_alloc_nic_by_product_id(nip->product_id);
  if (nicp == NULL) LF_ERROR(("Error allocating new NIC"));

  /* link new nic to owning host */
  nicp->slot = onicp->slot;
  nicp->host = onicp->host;
  nicp->host->nics[nicp->slot] = nicp;

  /* Copy over all the new info */
  nicp->host_nic_id = ntohs(nip->nic_id_16);
  LF_MAC_COPY(nicp->mac_addr, nip->mac_addr);
  LF_DUP_STRING(nicp->serial_no, nip->serial_no);

  /* If port count same, just move the links, else delete the links */
  if (nicp->num_ports == onicp->num_ports) {
    fms_move_nic_links(onicp, nicp);
  } else {
    fms_remove_nic_links(onicp);
    /* XXX fms_note_all_links_removed(LF_NODE(nicp)); */
  }

  /* all done with this NIC struct */
  fms_free_nic_private(onicp);
  lf_free_nic(onicp);

  /* create NIC private struct for new NIC */
  fms_fill_nic_private(nicp);

  /* tag this host as changed */
  fms_note_changed_host(nicp->host);

  /* only really need fabric change if links change or MAC addr changes,
   * but simpler just to always do it.  Rare case where it's not needed,
   * and just an optimization not to mark fabric as changed.
   */
  fms_state_fabric_changed(FMS_STATE_FABRIC_CHANGE);

  return nicp;

 except:
  return NULL;
}

/*
 * Remove all the links from a NIC
 */
void
fms_remove_nic_links(
  struct lf_nic *nicp)
{
  int p;

  for (p=0; p<nicp->num_ports; ++p) {
    fms_remove_topo_link(LF_NODE(nicp), p);
  }
}

/*
 * Move links from one NIC to another.
 * This does not alter the FMS-private part of the NIC struct.
 * This cheats a little in knowing that the ref_cnt for the link will not
 * change since we are just moving it.
 */
static void
fms_move_nic_links(
  struct lf_nic *onicp,
  struct lf_nic *nnicp)
{
  int p;

  for (p=0; p<onicp->num_ports; ++p) {
    union lf_node *np;
    struct lf_xcvr *oxcp;
    struct lf_xcvr *nxcp;

    np = onicp->topo_ports[p];
    if (np != NULL) {
      lf_make_topo_link(np, onicp->topo_rports[p], LF_NODE(nnicp), p);
      lf_make_topo_link(LF_NODE(nnicp), p, np, onicp->topo_rports[p]);
      onicp->topo_ports[p] = NULL;
    }

    oxcp = LF_XCVR(onicp->phys_ports[p]);
    nxcp = LF_XCVR(nnicp->phys_ports[p]);
    np = oxcp->ports[0];

    if (np != NULL) {
      lf_make_phys_link(np, oxcp->rports[0], LF_NODE(nxcp), 0);
      lf_make_phys_link(LF_NODE(nxcp), 0, np, oxcp->rports[0]);
      oxcp->ports[0] = NULL;
    }
  }
}

/*
 * Remove a NIC from the fabric
 */
void
fms_remove_nic(
  struct lf_nic *nicp)
{
  struct lf_host *hp;

  hp = nicp->host;

  /* remove all the links attached to this NIC */
  fms_remove_nic_links(nicp);

  /* remove NIC from parent host */
  lf_remove_nic_from_host(nicp);

  /* all done with this NIC struct */
  fms_free_nic(nicp);

  /* tag this host as changed */
  fms_note_changed_host(hp);

  /* this counts as a fabric change */
  fms_state_fabric_changed(FMS_STATE_FABRIC_CHANGE);
}

/*
 * Add a NIC to a host
 */
struct lf_nic *
fms_add_nic(
  struct lf_host *hp,
  struct fma_fms_nic *nip)
{
  struct lf_nic *nicp;
  int num_ports;

  /* add NIC to the host struct */
  num_ports = ntohl(nip->num_active_ports_32);
  nicp = lf_add_nic_by_product_id(hp, nip->product_id);
  if (nicp == NULL) LF_ERROR(("Error adding new NIC"));

  /* Copy over all the new info */
  nicp->host_nic_id = ntohs(nip->nic_id_16);
  LF_MAC_COPY(nicp->mac_addr, nip->mac_addr);
  LF_DUP_STRING(nicp->serial_no, nip->serial_no);
  nicp->partition = 0;		/* XXX default partition */

  /* allocate FMS-private struct for the new NIC */
  fms_fill_nic_private(nicp);

  /* tag this host as changed */
  fms_note_changed_host(hp);

  /* this counts as a fabric change */
  fms_state_fabric_changed(FMS_STATE_FABRIC_CHANGE);

  return nicp;

 except:
  return NULL;
}

/*
 * See if we can request a map from a given host
 */
static int
fms_fma_try_request_map(
  struct fms_fma_desc *adp)
{
  struct lf_fabric *fp;
  struct fms_settings *fsp;
  int rc;

  /* get pointers to fabric */
  fp = F.fabvars->fabric;
  fsp = F.settings;

  /* make sure this is a viable host */
  if (adp->chp == NULL) return -1;

  /* attempt the request */
  rc = fms_fma_write(adp, LF_FMA_MAP_FABRIC, NULL, 0);
  if (rc == -1) {
    fms_perror();
    disconnect_from_fma(adp);
    return -1;
  }

  /* remember who is mapping */
  FMS_FABRIC(fp)->request_map_adp = adp;
  strcpy(FMS_FABRIC(fp)->last_mapper, adp->hostname);

  /*
   * set a timeout for map to be complete within
   */
  FMS_FABRIC(fp)->request_map_timeout = lf_schedule_event(
		fms_fma_mapping_timeout,
		adp, fsp->map_request_timeout*1000);
  if (FMS_FABRIC(fp)->request_map_timeout == NULL) {
    LF_ERROR(("Error scheduling mapping timeout"));
  }
  return 0;

 except:
  fms_perror_exit(1);
  return -1;
}

/*
 * Pick an FMA (the first one in the fabric host list) and request
 * a mapping of the fabric
 */
void
fms_fma_request_map(
  void *v)
{
  struct fms_fma_desc *adp;
  struct fms_fma_info *aip;
  struct lf_fabric *fp;
  struct lf_host *hp;
  int rc;
  int h;

  /* get pointers to fabric and clear scheduler event pointer */
  fp = F.fabvars->fabric;
  aip = F.fma_info;

  FMS_FABRIC(fp)->request_map_task = NULL;

  /*
   * Use a "preferred mapper" if one has been specified
   */
  if (aip->num_pref_mappers > 0) {

    for (h=0; h<aip->num_pref_mappers; ++h) {
      hp = lf_find_host_by_name(fp, aip->preferred_mappers[h]);
      if (hp == NULL) continue;

      adp = FMS_HOST(hp)->adp;
      if (adp == NULL) continue;

      /* skip disconnected hosts */
      if (FMS_HOST(hp)->disconnected) continue;

      /* attempt to request a map from this host */
      rc = fms_fma_try_request_map(adp);
      if (rc == 0) goto done;
    }
  }

  /* If we have an invalid map reporter, choose him for the new map */
  adp = FMS_FABRIC(fp)->invalid_map_reporter;
  if (adp != NULL && adp->fabric_host != NULL 
      && !FMS_HOST(adp->fabric_host)->disconnected) {

    /* attempt to request a map from this host */
    rc = fms_fma_try_request_map(adp);
    if (rc == 0) goto done;
  }

  /* find the first directly connected FMA in host list */
  adp = NULL;
  for (h=0; h<fp->num_hosts; ++h) {

    hp = fp->hosts[h];

    /* skip disconnected hosts */
    if (FMS_HOST(hp)->disconnected) continue;

    adp = FMS_HOST(hp)->adp;
    if (adp != NULL) {
      /* attempt to request a map from this host */
      rc = fms_fma_try_request_map(adp);
      if (rc == 0) goto done;
    }
  }

  /* If no adp found, then no one to collect map for us */
  fms_notify(FMS_EVENT_INFO, "No FMAs from which to request map");
  return;

 done:
  fms_notify(FMS_EVENT_INFO, "Requesting map from %s", adp->hostname);
}

/*
 * A map request has timed out - for now, kill the FMA doing the mapping and
 * pick someone else.
 */
static void
fms_fma_mapping_timeout(
  void *vadp)
{
  struct fms_fma_desc *adp;
  struct lf_fabric *fp;

  adp = vadp;

  /* get pointer to fabric and clear timeout */
  fp = F.fabvars->fabric;
  FMS_FABRIC(fp)->request_map_timeout = NULL;
  FMS_FABRIC(fp)->request_map_adp = NULL;

  fms_notify(FMS_EVENT_INFO, "Map timeout on %s", adp->hostname);

  /* terminate the FMA */
  fms_fma_terminate(adp, "mapping timeout");

  /* try again */
  fms_fma_request_map(NULL);
}

/*
 * We received a map from an FMA, turn it into a fabric and send it out to
 * anyone waiting for a map
 */
void
fms_fma_got_map(
  struct fms_fma_desc *adp,
  struct lf_topo_map *topo)
{
  struct lf_fabric *fp;
  struct lf_host *hp;
  int nl;

  /* get pointer to fabric and clear timeout */
  fp = F.fabvars->fabric;
  lf_remove_event(FMS_FABRIC(fp)->request_map_timeout);
  FMS_FABRIC(fp)->request_map_timeout = NULL;
  FMS_FABRIC(fp)->request_map_adp = NULL;

  fms_notify(FMS_EVENT_INFO, "Received map from %s", adp->hostname);

  /* If there are some links, process this topo map */
  nl = ntohl(topo->num_links_32);
  if (nl > 0) {

    /* turn this topo map into a fabric */
    fms_process_topo_map(topo);

    /* Arrange for a map to be sent */
    fms_schedule_map_push();

  } else {
    hp = adp->fabric_host;

    fms_notify(FMS_EVENT_INFO, "No links in map, %s is disconnected",
	adp->hostname);
    if (hp != NULL) {
      FMS_HOST(hp)->disconnected = TRUE;
    } else {
      fms_notify(FMS_EVENT_DEBUG, "No host struct for %s?", adp->hostname);
    }

    /* try getting a better map */
    fms_fma_request_map(NULL);
  }
}

/*
 * Handle an NIC error message
 */
static void
fms_fma_nic_error(
  struct fms_fma_desc *adp,
  struct fma_fms_nic_error_msg *msg)
{
  enum myri_error_type error;
  int nic_id;

  error = ntohl(msg->error_32);
  nic_id = ntohl(msg->nic_id_32);

  switch (error) {
    case MYRI_ERROR_HARD_SRAM_ERROR:
    case MYRI_ERROR_SOFT_SRAM_ERROR:
      fms_fma_alert_sram_parity_error(adp->fabric_host, nic_id);
      break;
    case MYRI_ERROR_FIRMWARE_UNRESPONSIVE:
      fms_fma_alert_nic_died(adp->fabric_host, nic_id);
    default:
      break;
  }
}

/*
 * Handle NIC badcrc msg
 */
static void
fms_fma_nic_badcrc(
  struct fms_fma_desc *adp,
  struct fma_fms_nic_badcrc_msg *msg)
{
  struct lf_nic *nicp;
  int nic_id;

  /* Find this NIC */
  nic_id = ntohl(msg->nic_id_32);
  nicp = lf_find_nic_by_id(adp->fabric_host, nic_id);
  if (nicp == NULL) {
    fms_notify(FMS_EVENT_ERROR, "Bad nic_id (%d) reported from %s",
	nic_id, adp->hostname);
    return;
  }

  /* raise an alert */
  fms_fma_alert_nic_badcrc(nicp, ntohl(msg->port_32), ntohl(msg->badcrcs_32));
}

/*
 * Send settings to an FMA
 */
static void
fms_fma_send_settings(
  struct fms_fma_desc *adp)
{
  struct fms_settings *fsp;
  struct lf_fma_settings msg;
  int rc;

  fsp = F.settings;

  /* send settings to FMA */
  msg.verify_interval_32 = htonl(fsp->link_verify_interval * 1000);
  msg.verify_timeout_32 = htonl(fsp->link_verify_timeout);
  msg.verify_retries_32 = htonl(fsp->link_verify_retries);
  msg.nic_query_interval_32 = htonl(fsp->nic_query_interval * 1000);
  msg.nic_badcrc_threshold_32 = htonl(fsp->lf_badcrc_threshold);
  msg.nic_scout_timeout_32 = htonl(fsp->nic_scout_timeout);
  msg.nic_scout_retries_32 = htonl(fsp->nic_scout_retries);

  /* send settings to FMA */
  rc = fms_fma_write(adp, LF_FMA_SETTINGS, &msg, sizeof(msg));
  if (rc != 0) {
    LF_ERROR(("Error sending settings to FMA"));
  }
  return;

 except:
  fms_perror();
  disconnect_from_fma(adp);
  return;
}

/*
 * Send some data to an FMA.
 * We will write to a socket if directly connected, else use
 * a proxy FMA to forward a message for us.
 */
int
fms_fma_write(
  struct fms_fma_desc *adp,
  int type,
  void *buf,
  int length)
{
  int rc;

  /* If we have a direct channel, use that */
  if (adp->chp != NULL) {
    struct lf_fma_msg_header hdr;

    /* send header */
    hdr.length_32 = htonl(length);
    hdr.msg_type_32 = htonl(type);
    rc = lf_write(adp->chp->fd, &hdr, sizeof(hdr));
    if (rc == -1) {
      return -1;
    }

    /* send message body */
    if (length > 0) {
      rc = lf_write(adp->chp->fd, buf, length);
      if (rc != length) {
	return -1;
      }
    }

  /* No channel struct, use a proxy */
  } else if (adp->proxy != NULL) {
    fms_proxy_write(adp, type, buf, length);

  /* For some reason, we can't talk to this guy... */
  } else {
    LF_ERROR(("No way to talk to FMA on %s", adp->hostname));
  }
  return 0;

 except:
  fms_perror_exit(1);
  return -1;
}

static void
fms_fma_reports_invalid_map(
  struct fms_fma_desc *adp,
  struct fma_fms_map_invalid_msg *msg)
{
  int his_map_version;
  struct fms_fma_desc *use_adp;
  struct fms_fabric *fmsp;

  fmsp = FMS_FABRIC(F.fabvars->fabric);

  his_map_version = ntohl(msg->minv_map_version_32);

  /* See if we should use reporter for next map */
  if (msg->minv_use_me_8) {
    use_adp = adp;
  } else {
    use_adp = NULL;
  }

  /* If the map is NULL, then we know it is invalid */
  if (fmsp->topo_map != NULL) {
    fms_notify(FMS_EVENT_INFO, "%s says map %d (curr=%d) is invalid: %s",
	adp->hostname, his_map_version, F.fabvars->map_version,
	msg->why);
  } else {
    fms_notify(FMS_EVENT_DEBUG, "%s says map %d (curr=none) is invalid: %s",
	adp->hostname, his_map_version, msg->why);
  }

  /*
   * Only invalidate this map if the reported map version matches current
   */
  if (fmsp->topo_map != NULL && his_map_version == F.fabvars->map_version) {

    /* mark this map as invalid */
    fms_fma_invalidate_map();

    /* tell state machine map is invalid */
    fms_state_map_is_invalid(use_adp);

  /*
   * but if versions do not match, arrange to send this guy our current map
   */
  } else {
    fms_fma_needs_topo_map(adp);
  }
}

